In [5]:
%load_ext autoreload
%matplotlib nbagg
%autoreload 2
import copy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

MECA653: Traitement de donnée - Analyse de la base de donnée de la sécurité routière

L'objectif ici est d'analyser les données fournies par le ministère de l'intérieure sur les accidents de la route resencés en 2016.

Le Module Panda sera largement utilisé.

Sources

Lien vers data.gouv.fr : https://www.data.gouv.fr/fr/datasets/base-de-donnees-accidents-corporels-de-la-circulation/#_

Documentation de la base de donnée : DATA/Description_des_bases_de_donneesONISR-Annees_2005_a_2016.pdf

1 - Charger les bases de donnée


In [66]:
dfc = pd.read_csv('./DATA/caracteristiques_2016.csv') 
dfu = pd.read_csv('./DATA/usagers_2016.csv')
dfl = pd.read_csv('./DATA/lieux_2016.csv')
df = pd.concat([dfu, dfc, dfl], axis=1)


C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2698: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [71]:
dfc.tail()


Out[71]:
Num_Acc an mois jour hrmn lum agg int atm col com adr gps lat long dep
59427 201600059428 16 12 1 1201 1 2 1 1 6.0 611 passamanty-Vahib‚ ( rout NaN NaN NaN 976
59428 201600059429 16 12 3 2215 1 2 1 1 7.0 611 mamoudzou centre-Adrian NaN NaN NaN 976
59429 201600059430 16 12 5 1710 1 2 1 1 3.0 611 kaweni-Kaweni sud (Z I) NaN NaN NaN 976
59430 201600059431 16 12 6 1005 1 2 3 1 3.0 611 kaweni-Kaweni nord ( Z I NaN NaN NaN 976
59431 201600059432 16 12 24 635 1 2 6 1 3.0 611 kaweni-Kaweni sud (Z I) NaN NaN NaN 976

In [65]:
dfu.head()


Out[65]:
Num_Acc place catu grav sexe trajet secu locp actp etatp an_nais num_veh
0 201600000001 1.0 1 1 2 0.0 11.0 0.0 0.0 0.0 1983.0 B02
1 201600000001 1.0 1 3 1 9.0 21.0 0.0 0.0 0.0 2001.0 A01
2 201600000002 1.0 1 3 1 5.0 11.0 0.0 0.0 0.0 1960.0 A01
3 201600000002 2.0 2 3 1 0.0 11.0 0.0 0.0 0.0 2000.0 A01
4 201600000002 3.0 2 3 2 0.0 11.0 0.0 0.0 0.0 1962.0 A01

In [70]:
dfl.tail()


Out[70]:
Num_Acc catr voie v1 v2 circ nbv pr pr1 vosp prof plan lartpc larrout surf infra situ env1
59427 201600059428 3 3 2.0 NaN 2.0 0.0 0.0 842.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 99.0
59428 201600059429 2 2 2.0 NaN 2.0 0.0 0.0 50.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 99.0
59429 201600059430 2 1 2.0 NaN 2.0 0.0 1.0 50.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 99.0
59430 201600059431 2 1 2.0 NaN 2.0 0.0 3.0 438.0 0.0 1.0 4.0 0.0 0.0 1.0 0.0 1.0 99.0
59431 201600059432 2 1 2.0 NaN 2.0 0.0 1.0 157.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 99.0

In [68]:
df.head()


Out[68]:
Num_Acc place catu grav sexe trajet secu locp actp etatp ... pr1 vosp prof plan lartpc larrout surf infra situ env1
0 201600000001 1.0 1 1 2 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 3.0 0.0 0.0 1.0 0.0 1.0 0.0
1 201600000001 1.0 1 3 1 9.0 21.0 0.0 0.0 0.0 ... NaN 0.0 1.0 2.0 0.0 58.0 1.0 0.0 1.0 0.0
2 201600000002 1.0 1 3 1 5.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 3.0 0.0 68.0 2.0 0.0 3.0 99.0
3 201600000002 2.0 2 3 1 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 99.0
4 201600000002 3.0 2 3 2 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 0.0 1.0 0.0 0.0 1.0 0.0 1.0 3.0

5 rows × 46 columns


In [63]:
df = pd.concat([df, dfl], axis=1)
df.head()


Out[63]:
Num_Acc place catu grav sexe trajet secu locp actp etatp ... pr1 vosp prof plan lartpc larrout surf infra situ env1
0 201600000001 1.0 1 1 2 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 3.0 0.0 0.0 1.0 0.0 1.0 0.0
1 201600000001 1.0 1 3 1 9.0 21.0 0.0 0.0 0.0 ... NaN 0.0 1.0 2.0 0.0 58.0 1.0 0.0 1.0 0.0
2 201600000002 1.0 1 3 1 5.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 3.0 0.0 68.0 2.0 0.0 3.0 99.0
3 201600000002 2.0 2 3 1 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 1.0 1.0 0.0 0.0 1.0 0.0 1.0 99.0
4 201600000002 3.0 2 3 2 0.0 11.0 0.0 0.0 0.0 ... NaN 0.0 0.0 1.0 0.0 0.0 1.0 0.0 1.0 3.0

5 rows × 64 columns

2 - Quelle est la poportion Homme/Femme impliquée dans les accidents ? Représenter le résultat sous forme graphique.


In [33]:
# methode pas propre
(h,c)=df[df.sexe==1].shape
(f,c)=df[df.sexe==2].shape

(t,c)=df.shape

print('h/t=', h/t)
print('f/t=', f/t)


h/t= 0.6999145568197149
f/t= 0.3000854431802851

In [34]:
# methode panda
df["sexe"].value_counts(normalize=True)


Out[34]:
1    0.699915
2    0.300085
Name: sexe, dtype: float64

In [55]:
fig = plt.figure()
df[df.grav==2].sexe.value_counts(normalize=True).plot.pie(labels=['Homme', 'Femme'], colors= ['r', 'g'], autopct='%.2f')


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0xe859748>

2 - Quelle est la poportion des accidents ayant eu lieu le jour, la nuit ou a l'aube/crépuscule? Représenter le résultat sous forme graphique.


In [46]:
dlum = df["lum"].value_counts(normalize=True)
dlum = dlum.sort_index()

In [47]:
dlum


Out[47]:
1.0    0.682763
2.0    0.060035
3.0    0.085627
4.0    0.008548
5.0    0.163027
Name: lum, dtype: float64

In [57]:
dlum[3] = dlum[3:5].sum()
fig = plt.figure()
dlum[1:3].plot.pie(labels=['Jour','Aube/crépuscule', 'Nuit'], colors= ['y', 'g' , 'b'], autopct='%.2f')


Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0xe918978>

3- Position géographique


In [109]:
df.lat=df.lat/100000
df.long=df.long/100000
dfp = df[df.gps=='M']
dfp = dfp[['lat','long']]
dfp = dfp[(dfp.long!=0.0) & (dfp.lat!=0.0)]
dfp.head()


Out[109]:
lat long
21 5.084579e-09 2.264070e-07
50 5.068000e-09 2.793000e-07
51 5.064100e-09 2.712000e-07
55 5.070800e-09 2.765000e-07
57 5.070266e-09 2.620940e-07

In [110]:
#fig = plt.figure()
dfp.plot.scatter(x='long', y='lat',s=1);



In [111]:
df[(df.long!=0.0) & (df.lat!=0.0) & (df.gps=='M')].plot.scatter(x='long', y='lat',s=.5);



In [ ]: